/******************************************************************************

 @File         PVRTDecompress.cpp

 @Title

 @Copyright    Copyright (C) 2000 - 2008 by Imagination Technologies Limited.

 @Platform     ANSI compatible

 @Description  PVRTC Texture Decompression.

 ******************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <math.h>
#include <string.h>
#include <assert.h>
#include <cstdint>
#include "pvr.h"

#define PVRT_MIN(a,b)            (((a) < (b)) ? (a) : (b))
#define PVRT_MAX(a,b)            (((a) > (b)) ? (a) : (b))
#define PVRT_CLAMP(x, l, h)      (PVRT_MIN((h), PVRT_MAX((x), (l))))

/*****************************************************************************
 * defines and consts
 *****************************************************************************/
#define PT_INDEX (2)    // The Punch-through index

#define BLK_Y_SIZE     (4) // always 4 for all 2D block types

#define BLK_X_MAX    (8)    // Max X dimension for blocks

#define BLK_X_2BPP    (8) // dimensions for the two formats
#define BLK_X_4BPP    (4)

#define WRAP_COORD(Val, Size) ((Val) & ((Size)-1))

#define POWER_OF_2(X)   util_number_is_power_2(X)

/*
 Define an expression to either wrap or clamp large or small vals to the
 legal coordinate range
 */
#define LIMIT_COORD(Val, Size, AssumeImageTiles) \
((AssumeImageTiles)? WRAP_COORD((Val), (Size)): PVRT_CLAMP((Val), 0, (Size)-1))

/*****************************************************************************
 * Useful typedefs
 *****************************************************************************/

typedef uint32_t U32;
typedef uint8_t U8;

/***********************************************************
 DECOMPRESSION ROUTINES
 ************************************************************/

/*!***********************************************************************
 @Struct    AMTC_BLOCK_STRUCT
 @Brief
 *************************************************************************/
typedef struct
{
    // Uses 64 bits pre block
    U32 PackedData[2];
}AMTC_BLOCK_STRUCT;


static void PVRDecompress(AMTC_BLOCK_STRUCT *pCompressedData,
                       const bool Do2bitMode,
                       const int XDim,
                       const int YDim,
                       const int AssumeImageTiles,
                       unsigned char* pResultImage);

/*!***********************************************************************
 @Function        PVRTDecompressPVRTC
 @Input            pCompressedData The PVRTC texture data to decompress
 @Input            Do2bitMode Signifies whether the data is PVRTC2 or PVRTC4
 @Input            XDim X dimension of the texture
 @Input            YDim Y dimension of the texture
 @Modified        pResultImage The decompressed texture data
 @Description    Decompresses PVRTC to RGBA 8888
 *************************************************************************/
int PVRTDecompressPVRTC(const void * const pCompressedData,const int XDim,const int YDim, void *pDestData,const bool Do2bitMode)
{
    PVRDecompress((AMTC_BLOCK_STRUCT*)pCompressedData,Do2bitMode,XDim,YDim,1,(unsigned char*)pDestData);

    return XDim*YDim/2;
}

/*!***********************************************************************
 @Function        util_number_is_power_2
 @Input        input A number
 @Returns        TRUE if the number is an integer power of two, else FALSE.
 @Description    Check that a number is an integer power of two, i.e.
 1, 2, 4, 8, ... etc.
 Returns FALSE for zero.
 *************************************************************************/
int util_number_is_power_2( unsigned  input )
{
    unsigned minus1;

    if( !input ) return 0;

    minus1 = input - 1;
    return ( (input | minus1) == (input ^ minus1) ) ? 1 : 0;
}


/*!***********************************************************************
 @Function        Unpack5554Colour
 @Input            pBlock
 @Input            ABColours
 @Description    Given a block, extract the colour information and convert
 to 5554 formats
 *************************************************************************/
static void Unpack5554Colour(const AMTC_BLOCK_STRUCT *pBlock,
                             int   ABColours[2][4])
{
    U32 RawBits[2];

    int i;

    // Extract A and B
    RawBits[0] = pBlock->PackedData[1] & (0xFFFE); // 15 bits (shifted up by one)
    RawBits[1] = pBlock->PackedData[1] >> 16;       // 16 bits

    // step through both colours
    for(i = 0; i < 2; i++)
    {
        // If completely opaque
        if(RawBits[i] & (1<<15))
        {
            // Extract R and G (both 5 bit)
            ABColours[i][0] = (RawBits[i] >> 10) & 0x1F;
            ABColours[i][1] = (RawBits[i] >>  5) & 0x1F;

            /*
             The precision of Blue depends on  A or B. If A then we need to
             replicate the top bit to get 5 bits in total
             */
            ABColours[i][2] = RawBits[i] & 0x1F;
            if(i==0)
            {
                ABColours[0][2] |= ABColours[0][2] >> 4;
            }

            // set 4bit alpha fully on...
            ABColours[i][3] = 0xF;
        }
        else // Else if colour has variable translucency
        {
            /*
             Extract R and G (both 4 bit).
             (Leave a space on the end for the replication of bits
             */
            ABColours[i][0] = (RawBits[i] >>  (8-1)) & 0x1E;
            ABColours[i][1] = (RawBits[i] >>  (4-1)) & 0x1E;

            // replicate bits to truly expand to 5 bits
            ABColours[i][0] |= ABColours[i][0] >> 4;
            ABColours[i][1] |= ABColours[i][1] >> 4;

            // grab the 3(+padding) or 4 bits of blue and add an extra padding bit
            ABColours[i][2] = (RawBits[i] & 0xF) << 1;

            /*
             expand from 3 to 5 bits if this is from colour A, or 4 to 5 bits if from
             colour B
             */
            if(i==0)
            {
                ABColours[0][2] |= ABColours[0][2] >> 3;
            }
            else
            {
                ABColours[0][2] |= ABColours[0][2] >> 4;
            }

            // Set the alpha bits to be 3 + a zero on the end
            ABColours[i][3] = (RawBits[i] >> 11) & 0xE;
        }
    }
}

/*!***********************************************************************
 @Function        UnpackModulations
 @Input            pBlock
 @Input            Do2bitMode
 @Input            ModulationVals
 @Input            ModulationModes
 @Input            StartX
 @Input            StartY
 @Description    Given the block and the texture type and it's relative
 position in the 2x2 group of blocks, extract the bit
 patterns for the fully defined pixels.
 *************************************************************************/
static void    UnpackModulations(const AMTC_BLOCK_STRUCT *pBlock,
                              const int Do2bitMode,
                              int ModulationVals[8][16],
                              int ModulationModes[8][16],
                              int StartX,
                              int StartY)
{
    int BlockModMode;
    U32 ModulationBits;

    int x, y;

    BlockModMode= pBlock->PackedData[1] & 1;
    ModulationBits    = pBlock->PackedData[0];

    // if it's in an interpolated mode
    if(Do2bitMode && BlockModMode)
    {
        /*
         run through all the pixels in the block. Note we can now treat all the
         "stored" values as if they have 2bits (even when they didn't!)
         */
        for(y = 0; y < BLK_Y_SIZE; y++)
        {
            for(x = 0; x < BLK_X_2BPP; x++)
            {
                ModulationModes[y+StartY][x+StartX] = BlockModMode;

                // if this is a stored value...
                if(((x^y)&1) == 0)
                {
                    ModulationVals[y+StartY][x+StartX] = ModulationBits & 3;
                    ModulationBits >>= 2;
                }
            }
        }
    }
    else if(Do2bitMode) // else if direct encoded 2bit mode - i.e. 1 mode bit per pixel
    {
        for(y = 0; y < BLK_Y_SIZE; y++)
        {
            for(x = 0; x < BLK_X_2BPP; x++)
            {
                ModulationModes[y+StartY][x+StartX] = BlockModMode;

                // double the bits so 0=> 00, and 1=>11
                if(ModulationBits & 1)
                {
                    ModulationVals[y+StartY][x+StartX] = 0x3;
                }
                else
                {
                    ModulationVals[y+StartY][x+StartX] = 0x0;
                }
                ModulationBits >>= 1;
            }
        }
    }
    else // else its the 4bpp mode so each value has 2 bits
    {
        for(y = 0; y < BLK_Y_SIZE; y++)
        {
            for(x = 0; x < BLK_X_4BPP; x++)
            {
                ModulationModes[y+StartY][x+StartX] = BlockModMode;

                ModulationVals[y+StartY][x+StartX] = ModulationBits & 3;
                ModulationBits >>= 2;
            }
        }
    }

    // make sure nothing is left over
    assert(ModulationBits==0);
}

/*!***********************************************************************
 @Function        InterpolateColours
 @Input            ColourP
 @Input            ColourQ
 @Input            ColourR
 @Input            ColourS
 @Input            Do2bitMode
 @Input            x
 @Input            y
 @Modified        Result
 @Description    This performs a HW bit accurate interpolation of either the
 A or B colours for a particular pixel.

 NOTE: It is assumed that the source colours are in ARGB 5554
 format - This means that some "preparation" of the values will
 be necessary.
 *************************************************************************/
static void InterpolateColours(const int ColourP[4],
                               const int ColourQ[4],
                               const int ColourR[4],
                               const int ColourS[4],
                               const int Do2bitMode,
                               const int x,
                               const int y,
                               int Result[4])
{
    int u, v, uscale;
    int k;

    int tmp1, tmp2;

    int P[4], Q[4], R[4], S[4];

    // Copy the colours
    for(k = 0; k < 4; k++)
    {
        P[k] = ColourP[k];
        Q[k] = ColourQ[k];
        R[k] = ColourR[k];
        S[k] = ColourS[k];
    }

    // put the x and y values into the right range
    v = (y & 0x3) | ((~y & 0x2) << 1);

    if(Do2bitMode)
        u = (x & 0x7) | ((~x & 0x4) << 1);
    else
        u = (x & 0x3) | ((~x & 0x2) << 1);

    // get the u and v scale amounts
    v  = v - BLK_Y_SIZE/2;

    if(Do2bitMode)
    {
        u = u - BLK_X_2BPP/2;
        uscale = 8;
    }
    else
    {
        u = u - BLK_X_4BPP/2;
        uscale = 4;
    }

    for(k = 0; k < 4; k++)
    {
        tmp1 = P[k] * uscale + u * (Q[k] - P[k]);
        tmp2 = R[k] * uscale + u * (S[k] - R[k]);

        tmp1 = tmp1 * 4 + v * (tmp2 - tmp1);

        Result[k] = tmp1;
    }

    // Lop off the appropriate number of bits to get us to 8 bit precision
    if(Do2bitMode)
    {
        // do RGB
        for(k = 0; k < 3; k++)
        {
            Result[k] >>= 2;
        }

        Result[3] >>= 1;
    }
    else
    {
        // do RGB  (A is ok)
        for(k = 0; k < 3; k++)
        {
            Result[k] >>= 1;
        }
    }

    // sanity check
    for(k = 0; k < 4; k++)
    {
        assert(Result[k] < 256);
    }


    /*
     Convert from 5554 to 8888

     do RGB 5.3 => 8
     */
    for(k = 0; k < 3; k++)
    {
        Result[k] += Result[k] >> 5;
    }

    Result[3] += Result[3] >> 4;

    // 2nd sanity check
    for(k = 0; k < 4; k++)
    {
        assert(Result[k] < 256);
    }

}

/*!***********************************************************************
 @Function        GetModulationValue
 @Input            x
 @Input            y
 @Input            Do2bitMode
 @Input            ModulationVals
 @Input            ModulationModes
 @Input            Mod
 @Input            DoPT
 @Description    Get the modulation value as a numerator of a fraction of 8ths
 *************************************************************************/
static void GetModulationValue(int x,
                               int y,
                               const int Do2bitMode,
                               const int ModulationVals[8][16],
                               const int ModulationModes[8][16],
                               int *Mod,
                               int *DoPT)
{
    static const int RepVals0[4] = {0, 3, 5, 8};
    static const int RepVals1[4] = {0, 4, 4, 8};

    int ModVal;

    // Map X and Y into the local 2x2 block
    y = (y & 0x3) | ((~y & 0x2) << 1);

    if(Do2bitMode)
        x = (x & 0x7) | ((~x & 0x4) << 1);
    else
        x = (x & 0x3) | ((~x & 0x2) << 1);

    // assume no PT for now
    *DoPT = 0;

    // extract the modulation value. If a simple encoding
    if(ModulationModes[y][x]==0)
    {
        ModVal = RepVals0[ModulationVals[y][x]];
    }
    else if(Do2bitMode)
    {
        // if this is a stored value
        if(((x^y)&1)==0)
            ModVal = RepVals0[ModulationVals[y][x]];
        else if(ModulationModes[y][x] == 1) // else average from the neighbours if H&V interpolation..
        {
            ModVal = (RepVals0[ModulationVals[y-1][x]] +
                      RepVals0[ModulationVals[y+1][x]] +
                      RepVals0[ModulationVals[y][x-1]] +
                      RepVals0[ModulationVals[y][x+1]] + 2) / 4;
        }
        else if(ModulationModes[y][x] == 2) // else if H-Only
        {
            ModVal = (RepVals0[ModulationVals[y][x-1]] +
                      RepVals0[ModulationVals[y][x+1]] + 1) / 2;
        }
        else // else it's V-Only
        {
            ModVal = (RepVals0[ModulationVals[y-1][x]] +
                      RepVals0[ModulationVals[y+1][x]] + 1) / 2;
        }
    }
    else // else it's 4BPP and PT encoding
    {
        ModVal = RepVals1[ModulationVals[y][x]];

        *DoPT = ModulationVals[y][x] == PT_INDEX;
    }

    *Mod =ModVal;
}

/*!***********************************************************************
 @Function        TwiddleUV
 @Input            YSize    Y dimension of the texture in pixels
 @Input            XSize    X dimension of the texture in pixels
 @Input            YPos    Pixel Y position
 @Input            XPos    Pixel X position
 @Returns        The twiddled offset of the pixel
 @Description    Given the Block (or pixel) coordinates and the dimension of
 the texture in blocks (or pixels) this returns the twiddled
 offset of the block (or pixel) from the start of the map.

 NOTE the dimensions of the texture must be a power of 2
 *************************************************************************/
static int DisableTwiddlingRoutine = 0;

static U32 TwiddleUV(U32 YSize, U32 XSize, U32 YPos, U32 XPos)
{
    U32 Twiddled;

    U32 MinDimension;
    U32 MaxValue;

    U32 SrcBitPos;
    U32 DstBitPos;

    int ShiftCount;

    assert(YPos < YSize);
    assert(XPos < XSize);

    assert(POWER_OF_2(YSize));
    assert(POWER_OF_2(XSize));

    if(YSize < XSize)
    {
        MinDimension = YSize;
        MaxValue     = XPos;
    }
    else
    {
        MinDimension = XSize;
        MaxValue     = YPos;
    }

    // Nasty hack to disable twiddling
    if(DisableTwiddlingRoutine)
        return (YPos* XSize + XPos);

    // Step through all the bits in the "minimum" dimension
    SrcBitPos = 1;
    DstBitPos = 1;
    Twiddled  = 0;
    ShiftCount = 0;

    while(SrcBitPos < MinDimension)
    {
        if(YPos & SrcBitPos)
        {
            Twiddled |= DstBitPos;
        }

        if(XPos & SrcBitPos)
        {
            Twiddled |= (DstBitPos << 1);
        }


        SrcBitPos <<= 1;
        DstBitPos <<= 2;
        ShiftCount += 1;

    }

    // prepend any unused bits
    MaxValue >>= ShiftCount;

    Twiddled |=  (MaxValue << (2*ShiftCount));

    return Twiddled;
}

/*!***********************************************************************
 @Function        Decompress
 @Input            pCompressedData The PVRTC texture data to decompress
 @Input            Do2BitMode Signifies whether the data is PVRTC2 or PVRTC4
 @Input            XDim X dimension of the texture
 @Input            YDim Y dimension of the texture
 @Input            AssumeImageTiles Assume the texture data tiles
 @Modified        pResultImage The decompressed texture data
 @Description    Decompresses PVRTC to RGBA 8888
 *************************************************************************/
static void PVRDecompress(AMTC_BLOCK_STRUCT *pCompressedData,
                       const bool Do2bitMode,
                       const int XDim,
                       const int YDim,
                       const int AssumeImageTiles,
                       unsigned char* pResultImage)
{
    int x, y;
    int i, j;

    int BlkX, BlkY;
    int BlkXp1, BlkYp1;
    int XBlockSize;
    int BlkXDim, BlkYDim;

    int StartX, StartY;

    int ModulationVals[8][16];
    int ModulationModes[8][16];

    int Mod, DoPT;

    unsigned int uPosition;

    // local neighbourhood of blocks
    AMTC_BLOCK_STRUCT *pBlocks[2][2];

    AMTC_BLOCK_STRUCT *pPrevious[2][2] = {{NULL, NULL}, {NULL, NULL}};

    // Low precision colours extracted from the blocks
    struct
    {
        int Reps[2][4];
    }Colours5554[2][2];

    // Interpolated A and B colours for the pixel
    int ASig[4], BSig[4];

    int Result[4];

    if(Do2bitMode)
        XBlockSize = BLK_X_2BPP;
    else
        XBlockSize = BLK_X_4BPP;

    // For MBX don't allow the sizes to get too small
    BlkXDim = PVRT_MAX(2, XDim / XBlockSize);
    BlkYDim = PVRT_MAX(2, YDim / BLK_Y_SIZE);

    /*
     Step through the pixels of the image decompressing each one in turn

     Note that this is a hideously inefficient way to do this!
     */
    for(y = 0; y < YDim; y++)
    {
        for(x = 0; x < XDim; x++)
        {
            // map this pixel to the top left neighbourhood of blocks
            BlkX = (x - XBlockSize/2);
            BlkY = (y - BLK_Y_SIZE/2);

            BlkX = LIMIT_COORD(BlkX, XDim, AssumeImageTiles);
            BlkY = LIMIT_COORD(BlkY, YDim, AssumeImageTiles);


            BlkX /= XBlockSize;
            BlkY /= BLK_Y_SIZE;

            // compute the positions of the other 3 blocks
            BlkXp1 = LIMIT_COORD(BlkX+1, BlkXDim, AssumeImageTiles);
            BlkYp1 = LIMIT_COORD(BlkY+1, BlkYDim, AssumeImageTiles);

            // Map to block memory locations
            pBlocks[0][0] = pCompressedData +TwiddleUV(BlkYDim, BlkXDim, BlkY, BlkX);
            pBlocks[0][1] = pCompressedData +TwiddleUV(BlkYDim, BlkXDim, BlkY, BlkXp1);
            pBlocks[1][0] = pCompressedData +TwiddleUV(BlkYDim, BlkXDim, BlkYp1, BlkX);
            pBlocks[1][1] = pCompressedData +TwiddleUV(BlkYDim, BlkXDim, BlkYp1, BlkXp1);


            /*
             extract the colours and the modulation information IF the previous values
             have changed.
             */
            if(memcmp(pPrevious, pBlocks, 4*sizeof(void*)) != 0)
            {
                StartY = 0;
                for(i = 0; i < 2; i++)
                {
                    StartX = 0;
                    for(j = 0; j < 2; j++)
                    {
                        Unpack5554Colour(pBlocks[i][j], Colours5554[i][j].Reps);

                        UnpackModulations(pBlocks[i][j],
                                          Do2bitMode,
                                          ModulationVals,
                                          ModulationModes,
                                          StartX, StartY);

                        StartX += XBlockSize;
                    }

                    StartY += BLK_Y_SIZE;
                }

                // make a copy of the new pointers
                memcpy(pPrevious, pBlocks, 4*sizeof(void*));
            }

            // decompress the pixel.  First compute the interpolated A and B signals
            InterpolateColours(Colours5554[0][0].Reps[0],
                               Colours5554[0][1].Reps[0],
                               Colours5554[1][0].Reps[0],
                               Colours5554[1][1].Reps[0],
                               Do2bitMode, x, y,
                               ASig);

            InterpolateColours(Colours5554[0][0].Reps[1],
                               Colours5554[0][1].Reps[1],
                               Colours5554[1][0].Reps[1],
                               Colours5554[1][1].Reps[1],
                               Do2bitMode, x, y,
                               BSig);

            GetModulationValue(x,y, Do2bitMode, (const int (*)[16])ModulationVals, (const int (*)[16])ModulationModes,
                               &Mod, &DoPT);

            // compute the modulated colour
            for(i = 0; i < 4; i++)
            {
                Result[i] = ASig[i] * 8 + Mod * (BSig[i] - ASig[i]);
                Result[i] >>= 3;
            }

            if(DoPT)
                Result[3] = 0;

            // Store the result in the output image
            uPosition = (x+y*XDim)<<2;
            pResultImage[uPosition+0] = (U8)Result[0];
            pResultImage[uPosition+1] = (U8)Result[1];
            pResultImage[uPosition+2] = (U8)Result[2];
            pResultImage[uPosition+3] = (U8)Result[3];
        }
    }
}

/*****************************************************************************
 End of file (pvr.cpp)
 *****************************************************************************/

